Table of Contents
- 1 Sec. 2.2
- 2 Sec. 2.4
- 3 Sec. 2.5
- 4 Sec. 2.6
- 5 Sec. 2.7
- 6 Sec. 2.8
- 7 Sec. 2.9
- 8 Sec. 2.10
- 9 Sec. 2.11
# install.packages("bizdays")
# install.packages("RQuantLib")
# install.packages("fUnitRoots")
# install.packages("TSA")
# install.packages("devtools")
# install.packages("Dict")
# install.packages("devtools")
# install.packages("Dict")
# devtools::create("AFTSCode")
library(fUnitRoots)
library(bizdays, RQuantLib)
library(TSA) # eacf
library(forecast)
print(packageVersion("bizdays"))
print(packageVersion("RQuantLib"))
library(knitr)
library(IRdisplay)
library(fBasics)
library(Dict)
library(sandwich) # Use heteroskedasticity-consistent (HC) variance estimation
library(lmtest) # Apply HC variance to linear models
library(tseries) # Jarque-Bera test
# detach("package:AFTSCode", unload = TRUE) # First detach the package
# unloadNamespace("AFTSCode")
library(AFTSCode)
# find("plot_pacf_acf"); find("draw_arima_forecast_fig")
# install_redir_output <- function(lib, log_fname) {
# # Open a file connection for writing
# sink(log_fname, append = TRUE)
# # Install packages (output will be redirected to the file)
# install.packages(lib)
# # Close the file connection
# sink()
# }
# plot_time_fig <- function(ts, main, xlab, ylab) {
# par(bg = "white")
# plot(ts, type = 'l', main = main, xlab = xlab, ylab = ylab)
# points(ts, pch = '*')
# }
# cal_phi_0 <- function(arima_md, ord, digits=6) {
# phi_0 = as.numeric((1-sum(arima_md$coef[1:ord]))*arima_md$coef['intercept'])
# print(format(phi_0, digits = digits))
# phi_0
# }
# get_mu <- function(arima_mod, digits=6) {
# mu = as.numeric(arima_mod$coef['intercept'])
# print(format(mu, digits = digits))
# mu
# }
# plot_arima_forecast_fig <- function(
# da_ts, eotr, h, npts, frequency,
# order, fixed, method, transform.pars,
# main, xlab, ylab, ylim = NULL
# ) {
# par(bg = "white")
# # arima model
# tr_da_ts = ts(da_ts[1:eotr], frequency = frequency, start = start(da_ts))
# if (is.null(transform.pars)) {
# ts_fm3 = arima(tr_da_ts, order = order, fixed = fixed, method = method)
# } else {
# ts_fm3 = arima(tr_da_ts, order = order, fixed = fixed, method = method, transform.pars = transform.pars)
# }
# print(ts_fm3$nobs)
# # Forecast
# ts_fm3$x <- tr_da_ts # https://stackoverflow.com/a/42464130/4307919
# ts_fc_res = forecast(ts_fm3, h = h)
# # Plot forecast
# xmin = time(da_ts)[eotr]-npts/frequency
# # xmax = end(da_ts)[1]+(end(da_ts)[2]+2)/frequency
# xmax = time(da_ts)[eotr]+(max(h, length(da_ts)-eotr)+1)/frequency
# cat(xmin, ";", xmax)
# plot(ts_fc_res, xlim = c(xmin, xmax), ylim = ylim, main = main, xlab = xlab, ylab = ylab)
# # Plot forecast mean
# dummy_1st_fmean_ts = ts(c(c(da_ts[eotr]), as.numeric(ts_fc_res$mean)), frequency = frequency, start = end(tr_da_ts))
# lines(dummy_1st_fmean_ts)
# points(dummy_1st_fmean_ts, pch = 1)
# # Plot confidence interval
# dummy_1st_flower_ts = ts(c(c(da_ts[eotr]), as.numeric(ts_fc_res$lower[,2])), frequency = frequency, start = end(tr_da_ts))
# dummy_1st_fupper_ts = ts(c(c(da_ts[eotr]), as.numeric(ts_fc_res$upper[,2])), frequency = frequency, start = end(tr_da_ts))
# lines(dummy_1st_flower_ts, lty=2)
# lines(dummy_1st_fupper_ts, lty=2)
# # Plot original data
# orig_plot_ts = ts(da_ts[(eotr-npts+1):length(da_ts)], frequency = frequency, start = time(da_ts)[eotr]-(npts-1)/frequency)
# lines(orig_plot_ts)
# points(orig_plot_ts, pch = 19)
# ts_fc_res
# }
# comb_forecast_res <- function(forecast_obj, da_ts, eotr, freq) {
# display(summary(forecast_obj))
# fc_std_err = (forecast_obj$upper[,2]-forecast_obj$lower[,2])/2/qnorm(p = 0.975)
# actual_ts = ts(da_ts[(eotr+1):length(da_ts)], frequency = freq, start = time(da_ts)[eotr+1])
# display(forecast_obj$mean); display(fc_std_err); display(actual_ts)
# multistep_ahead_forecast_tb = cbind(forecast_obj$mean, fc_std_err, actual_ts)
# dimnames(multistep_ahead_forecast_tb)[[2]] <- c("Forecast", "Std. Error", "Actual")
# multistep_ahead_forecast_tb
# }
# plot_unit_root_figs <- function(da_ts, freq, start, lag_max, main, xlab, ylab) {
# par(bg = "white")
# # lgdp_ts = ts(lgdp, frequency = 4, start = c(1947 ,1))
# diff_da_ts = ts(diff(da_ts), frequency = freq, start = start)
# plot(da_ts, col = 'black', type = 'l', main = main, xlab = xlab, ylab = ylab)
# acf(da_ts, lag.max = 20)
# plot_time_fig(diff_da_ts, main = main, xlab = xlab, ylab = paste("diff(", ylab, ")", sep = ""))
# pacf(diff_da_ts, lag.max = 20)
# }
# plot_acf <- function(da, lag.max = NULL, main=NULL, w=NULL, h=NULL, ...) {
# if (is.null(w) | is.null(h)) {
# par(bg = 'white')
# } else {
# par(bg = 'white', pin = c(w, h))
# }
# plot(acf(da, lag.max = lag.max, plot = F, `drop.lag.0` = F, ...), main = main)
# }
# plot_pacf_acf <- function(da, lag.max = NULL, main=NULL, w=NULL, h=NULL, ...) {
# if (is.null(w) | is.null(h)) {
# par(mfrow = c(2, 1), bg = 'white')
# } else {
# par(mfrow = c(2, 1), bg = 'white', pin = c(w, h))
# }
# plot(pacf(da, lag.max = lag.max, plot = F, `drop.lag.0` = F, ...), main = main)
# plot(acf(da, lag.max = lag.max, plot = F, `drop.lag.0` = F, ...), main = main)
# }
Sec. 2.2¶
Box-Ljung test (IBM and CRSP Value-Weighted Index)¶
- IBM monthly returns shows almost no serial correlation.
- CRSP Value-Weighted Index monthly returns shows medium momentum at lag-1 and medium reverting at lag-3.
da = read.table("../AFTS_data/Ch02/m-ibm3dx2608.txt", header=T)
da[1:5,]
find("acf")
# IBM (Fig. 2.1)
plot_pacf_acf(da$ibmrtn, lag.max = 100)
# CRSP Value-Weighted Index (Fig. 2.2)
plot_pacf_acf(da$vwrtn, lag.max = 100)
Box.test(da$ibmrtn, lag = 5, type = 'Ljung')
libm = log(da$ibmrtn+1)
Box.test(libm, lag = 5, type = 'Ljung')
Find IBM monthly and yearly expected return¶
- For the
log(Return)- $\mu_{monthly}\approx 0.010891$
- $\sigma_{monthly}\approx 0.070333$
# µ, sigma, and monthly price ratio
ibm_basic_stats <- apply(as.data.frame(libm), 2, basicStats)
ibm_basic_stats$libm['Mean',];
ibm_basic_stats$libm['Stdev',];
mnthly_ibm_pr_ratio = exp(ibm_basic_stats$libm['Mean',]+ibm_basic_stats$libm['Stdev',]^2/2)
mnthly_ibm_pr_ratio; mnthly_ibm_pr_ratio^12;
Sec. 2.4¶
AR(3) (GDP)¶
- Quarterly GDP
gnp = scan(file = "../AFTS_data/Ch02/dgnp82.txt")
gnp1 = ts(gnp, frequency = 4, start = c(1947, 2))
plot_time_fig(gnp1, main = "Gross National Product", xlab = "Year", ylab = "Growth")
- Find the AR order using AIC
m1 = ar(gnp, method = 'mle')
m1$order
m2 = arima(gnp, order = c(3, 0, 0))
m2
- $\phi_0 = \mu*(1-\phi_1-\phi_2-\phi_3)$
cal_phi_0(m2, ord=3)
Residual standar error
sqrt(m2$sigma2)
- Characteristic equation
?polyroot
p1 = c(1, -m2$coef[1:3])
roots = polyroot(p1)
roots
for (i in 1:3) {
print(i)
print(as.numeric(Mod(1-m2$coef[1]*roots[i]-m2$coef[2]*roots[i]^2-m2$coef[3]*roots[i]^3)))
}
- Modulus of the roots
roots; Mod(roots)
1/roots; Mod(1/roots)
- Compute the average length of business cycles
- Doesn't matter if using the characteristic roots or the inverse of them.
- Unit is "quarter". So average business cycle is about 10.66 quarters.
k = 2*pi/acos(0.434406493995099/0.522654950320849)
k
k = 2*pi/acos(1.59025281352281/1.91330819575347)
k
AIC (GDP)¶
- Quarterly GDP
gnp = scan(file = '../AFTS_data/Ch02/dgnp82.txt')
ord = ar(gnp, method = 'mle')
format(ord$aic, digits = 6)
ord$order; ord$order.max
AR(3) example (IBM and Value-Weighted returns)¶
- Monthly IBM returns
- Monthly Value-Weighted returns
da = read.table("../AFTS_data/Ch02/m-ibm3dx2608.txt", header = T)
da[1:5,]
Parameter estimation for value-weighted index¶
- Try simple return
vwrtn_m3 = arima(da$vwrtn, order = c(3, 0, 0))
vwrtn_m3
vwrtn_ts = ts(da$vwrtn, frequency = 12, start = c(1926, 1))
plot_time_fig(vwrtn_ts, main = "Value-weighted index return", xlab = "Year", ylab = "Return")
est_mrtn = get_mu(vwrtn_m3)
(1+est_mrtn)^12-1
- Try log-return
lvwrtn_ts = ts(log(1+da$vwrtn), frequency = 12, start = c(1926, 1))
lvwrtn_m3 = arima(lvwrtn_ts, order = c(3, 0, 0))
lvwrtn_m3
plot_time_fig(lvwrtn_ts, main = "Value-weighted index log-return", xlab = "Year", ylab = "log(Return)")
est_mlrtn = get_mu(lvwrtn_m3)
exp(est_mlrtn*12)-1
Calculate the actual yearly return¶
- The
9.3%per annum is close to11.3%per annum from the estimated model on simple return. - If I transfer the simple return to log return using
log(1+rtn), the estimated return per annum is9.3%, which is closer to the actual value.
vw = da$vwrtn # Value-weighted index
t1 = prod(vw+1) # Simple gross return
t1; length(vw); t1^(12/length(vw))-1; # monthly return per annum
Model checking (Value-Weighted return)¶
- Value-Weighted return
m3 = arima(da$vwrtn, order = c(3, 0, 0))
m3
- Compute the intercept $\phi_0 = \mu*(1-\phi_1-\phi_2-\phi_3)$
cal_phi_0(m3, ord=3)
- Compute the standard error of residuals
attributes(m3)
c(
sqrt(sum(m3$residuals^2)/(m3$nobs-2*length(m3$coef)-1)),
sqrt(sum(m3$residuals^2)/(m3$nobs-length(m3$coef))),
sqrt(sum(m3$residuals^2)/m3$nobs)
)
sqrt(m3$sigma2)
- Ljung-Box test (R uses 12 dof)
Box.test(m3$residuals, lag = 12, type = 'Ljung')
- Compute p-value using 9 dof (
12-3=9, the parameter for AR does not include the intercept.)
pv = 1-pchisq(16.35,9)
pv
- Refine the model to rule out statistically insignificant coef by fixing the AR(2) coef to zero (pp51)
mm3 = arima(da$vwrtn, order = c(3,0,0), fixed = c(NA,0,NA,NA))
mm3
mm3_1 = arima(da$vwrtn, order = c(3,0,0), fixed = c(NA,0,NA,NA), method = "ML")
mm3_1; cal_phi_0(mm3_1, ord = 3)
mm3_2 = arima(da$vwrtn, order = c(3,0,0), fixed = c(NA,0,NA,NA), method = "CSS")
mm3_2; cal_phi_0(mm3_2, ord = 3)
mm3_3 = arima(da$vwrtn, order = c(3,0,0), fixed = c(NA,0,NA,NA), method = "CSS-ML")
mm3_3; cal_phi_0(mm3_3, ord = 3)
cal_phi_0(mm3, ord = 3)
- In the
arimasoftware, when estimating the residual variance, they use sample size $T$, instead of $T-2*p-1$.
sum(mm3$residuals^2)/(length(da$vwrtn)); mm3$sigma2; sqrt(mm3$sigma2)
box_test <- Box.test(mm3$residuals, lag = 12, type = 'Ljung')
box_test
attributes(box_test)
- Compute p-value using 10 dof (
12-2=10, the parameter for AR does not include the intercept.)
pv = as.numeric(1-pchisq(box_test$statistic, 10))
pv; box_test$statistic
# install.packages("forecast")
da = read.table("../AFTS_data/Ch02/m-ibm3dx2608.txt", header = T)
da[1:5,]
f_da_ts = ts(da$vwrtn[1:tr_n], frequency = 12, start = c(1926, 1))
ts_fm3 = arima(f_da_ts, order = c(3,0,0))
# ts_fc_res = forecast(ts_fm3, h = 12)
ts_fc_res = predict(ts_fm3, n.ahead=12)
ts_fc_res
f_da_ts = ts(da$vwrtn[1:tr_n], frequency = 12, start = c(1926, 1))
ts_fm3 = arima(f_da_ts, order = c(3,0,0))
ts_fm3$x <- f_da_ts
ts_fc_res = forecast(ts_fm3, h = 12)
summary(ts_fc_res)
cal_phi_0(ts_fm3, ord = 3)
sqrt(ts_fm3$sigma2)
# Use forecast
par(bg = "white")
xend = end(f_da_ts)[1]+end(f_da_ts)[2]/12
xmin = xend-1
xmax = xend+1
plot(ts_fc_res, xlim = c(xmin, xmax))
# Use predict
par(bg = "white")
xend = end(f_da_ts)[1]+end(f_da_ts)[2]/12
xmin = xend-1
xmax = xend+1
plot(ts_fc_res$pred, xlim = c(xmin, xmax), ylim = c(-0.3, 0.4))
attributes(ts_fc_res)
- Make multi-steps forecast plots (Fig. 2.7)
freq = 12
vwrtn_ts = ts(da$vwrtn, frequency = freq, start = c(1926, 1))
eotr=984
ts_fc_res = plot_arima_forecast_fig(
da_ts=vwrtn_ts,
eotr=eotr, h=12, npts=12, frequency=freq,
order=c(3,0,0), fixed=NULL, method=NULL, transform.pars=TRUE,
main="Forecasts from ARIMA(3,0,0) with non-zero mean for ewrtn (Fig. 2.7)",
xlab="Year", ylab="Return", ylim=c(-0.21, 0.21)
)
- Forecast values (Table 2.2)
summary(ts_fc_res)
install_redir_output(lib = "knitr", log_fname = "knitr_install.log")
vwrtn_multistep_ahead_forecast_tb <- comb_forecast_res(ts_fc_res, vwrtn_ts, eotr, freq)
vwrtn_multistep_ahead_forecast_tb
class(vwrtn_multistep_ahead_forecast_tb); attributes(vwrtn_multistep_ahead_forecast_tb)
Find Volume-weighted monthly and yearly expected return¶
- For the
log(Return)- $\mu_{monthly}\approx 0.007403$
- $\sigma_{monthly}\approx 0.05434$
# µ, sigma, monthly pr ratio, yearly pr ratio
vwrtn_basic_stats = apply(as.data.frame(lvwrtn_ts), 2, basicStats)
vwrtn_basic_stats$x['Mean',];
vwrtn_basic_stats$x['Stdev',];
mnthly_vw_pr_ratio = exp(vwrtn_basic_stats$x['Mean',]+vwrtn_basic_stats$x['Stdev',]^2/2);
mnthly_vw_pr_ratio; mnthly_vw_pr_ratio^12;
MA(9) fitting with fixed coefs example (Sec. 2.5.3) (IBM)¶
- Monthly IBM returns
da = read.table("../AFTS_data/Ch02/m-ibm3dx2608.txt", header = T)
da[1:5,]
ewrtn_ts = ts(da$ewrtn, frequency = 12, start = c(1926, 1))
plot_time_fig(ts = ewrtn_ts, main = "Equal-weighted index return (Fig. 2.8)", xlab = "Year", ylab = "Simple Return")
par(bg = "white")
acf(da$ewrtn, main = "ACF: Equal-weighted index return (Fig. 2.8)")
find("arima")
- Fit
MA(9)process
ewrtn_ma_mod = arima(
da$ewrtn,
order = c(0,0,9),
fixed = c(NA, 0, NA, 0, 0, 0, 0, 0, NA, NA), # Include the mean
transform.pars = F # Doesn't make difference
)
ewrtn_ma_mod; sqrt(ewrtn_ma_mod$sigma2)
ewrtn_ma_mod_1 = arima(
da$ewrtn,
order = c(0,0,9),
fixed = c(NA, 0, NA, 0, 0, 0, 0, 0, NA, NA),
method = 'ML',
transform.pars = F # Doesn't make difference
)
ewrtn_ma_mod_1; sqrt(ewrtn_ma_mod_1$sigma2)
ewrtn_ma_mod_2 = arima(
da$ewrtn,
order = c(0,0,9),
fixed = c(NA, 0, NA, 0, 0, 0, 0, 0, NA, NA),
method = 'CSS',
transform.pars = F # Doesn't make difference
)
ewrtn_ma_mod_2; sqrt(ewrtn_ma_mod_2$sigma2)
ewrtn_ma_mod_3 = arima(
da$ewrtn,
order = c(0,0,9),
fixed = c(NA, 0, NA, 0, 0, 0, 0, 0, NA, NA),
method = 'CSS-ML',
transform.pars = F # Doesn't make difference
)
ewrtn_ma_mod_3; sqrt(ewrtn_ma_mod_3$sigma2)
- Box-Ljung test (us 12 dof)
box_test_res = Box.test(ewrtn_ma_mod$residuals, lag = 12, type = 'Ljung');
box_test_res; 1-pchisq(box_test_res$statistic, df = 9); 1-pchisq(box_test_res$statistic, df = 12)
box_test_res_1 = Box.test(ewrtn_ma_mod_1$residuals, lag = 12, type = 'Ljung');
box_test_res_1; 1-pchisq(box_test_res_1$statistic, df = 9); 1-pchisq(box_test_res_1$statistic, df = 12)
box_test_res_2 = Box.test(ewrtn_ma_mod_2$residuals, lag = 12, type = 'Ljung');
box_test_res_2; 1-pchisq(box_test_res_2$statistic, df = 9); 1-pchisq(box_test_res_2$statistic, df = 12)
box_test_res_3 = Box.test(ewrtn_ma_mod_3$residuals, lag = 12, type = 'Ljung');
box_test_res_3; 1-pchisq(box_test_res_3$statistic, df = 9); 1-pchisq(box_test_res_3$statistic, df = 12)
MA(9) forecasting (Sec. 2.5.4) (Equal-Weighted returns)¶
- Monthly Equal-Weighted returns
- Plot the forecasting
eotr = 986
h = 10
npts = 12
freq = 12
order = c(0,0,9)
fixed = c(NA, 0, NA, 0, 0, 0, 0, 0, NA, NA)
seasonal = NULL
ewrtn_fc_res = plot_arima_forecast_fig(
da_ts=ewrtn_ts, eotr=eotr, h=h, npts=npts, frequency=freq,
order=order, seasonal=seasonal, fixed=fixed, method='ML', transform.pars=F,
main="Forecasts from ARIMA(0,0,9) with non-zero mean for ewrtn (Fig. 2.7)",
xlab="Year", ylab="Return", ylim=c(-0.25, 0.25)
)
- Forecasting results
ewrtn_multistep_ahead_forecast_tb = comb_forecast_res(
forecast_obj=ewrtn_fc_res,
da_ts=ewrtn_ts,
eotr=eotr,
freq=freq
)
ewrtn_multistep_ahead_forecast_tb
Find Equal-weighted monthly and yearly expected return¶
- Monthly and yearly Equal-Weighted returns
# µ, sigma, monthly pr ratio, yearly pr ratio
lewrtn_ts = ts(log(1+ewrtn_ts), frequency = 12, start = c(1926, 1))
ewrtn_basic_stats = apply(as.data.frame(lewrtn_ts), 2, basicStats)
ewrtn_basic_stats$x['Mean',];
ewrtn_basic_stats$x['Stdev',];
mnthly_ew_pr_ratio = exp(ewrtn_basic_stats$x['Mean',]+ewrtn_basic_stats$x['Stdev',]^2/2);
mnthly_ew_pr_ratio; mnthly_ew_pr_ratio^12;
Sec. 2.6¶
3M log price (Sec. 2.6.3)¶
da = read.table("../AFTS_data/Ch02/m-3m4608.txt", header = T)
da[1:5,]
lrtn = log(1+da$rtn)
lrtn_ts = ts(lrtn, frequency = 12, start = c(1946, 2))
plot_time_fig(lrtn_ts, main = "3M stock (Fig. 2.9)", xlab = "Year", ylab = "log(Return)")
par(bg = "white")
acf(lrtn_ts, main = "3M stock (Fig. 2.9)")
- Print EACF results (Table 2.5)
eacf: link
find("eacf")
eacf_res <- eacf(lrtn_ts, ar.max = 6, ma.max = 12)
eacf_stats_tb = format(as.data.frame(eacf_res$eacf), digits = 3)
names(eacf_stats_tb) <- seq(from = 0, to = 12)
display(eacf_stats_tb)
display(eacf_res$symbol)
display(2/sqrt(length(lrtn_ts)))
Find 3M monthly and yearly expected return¶
- For the
log(Return)- $\mu_{monthly}\approx 0.010299$
- $\sigma_{monthly}\approx 0.063719$
basic_3m_stats <- apply(as.data.frame(lrtn), 2, basicStats)
basic_3m_stats
# µ, std, and std of mean
basic_3m_stats$lrtn['Mean',]; basic_3m_stats$lrtn['Stdev',]; basic_3m_stats$lrtn['Stdev',]/sqrt(basic_3m_stats$lrtn['nobs',])
# 1 + Monthly simple grow rate
mnthly_3m_pr_ratio = exp(basic_3m_stats$lrtn['Mean',]+basic_3m_stats$lrtn['Stdev',]^2/2)
mnthly_3m_pr_ratio; mnthly_3m_pr_ratio^12
Sec. 2.7¶
3M log price¶
da = read.table("../AFTS_data/Ch02/m-3m4608.txt", header = T)
da[1:5,]
- See the t-statistics and p-value of mean of log returns
lrtn = log(1+da$rtn)
lrtn_ts = ts(lrtn, frequency = 12, start = c(1946, 2))
t_stats = mean(lrtn)/(sd(lrtn)/sqrt(length(lrtn)))
pv = 2*(1-pnorm(abs(t_stats)))
mean(lrtn);
sd(lrtn);
sd(lrtn)/sqrt(length(lrtn)); # variance of the mean of the log returns
t_stats;
pv
- Construct two price series
- Using log return
- Using mean-corrected log return
lgPr1 = cumsum(lrtn)
lgPr2 = cumsum(lrtn-mean(lrtn))
lgPr1_ts = ts(lgPr1, frequency = 12, start = c(1946, 1))
lgPr2_ts = ts(lgPr2, frequency = 12, start = c(1946, 1))
ln_ts = ts(mean(lrtn)*seq(from = 1, to = length(lrtn), by = 1), frequency = 12, start = c(1946, 1))
par(bg = "white")
plot(ln_ts, col = 'black', type = 'l', main = '3M constructed log-pr series (Fig. 2.10)', xlab = "Year", ylab = "log(Price)")
# plot(lgPr1_ts, type = 'p', pch = 1, main = '3M constructed log-pr series', xlab = "Year", ylab = "Log Price")
points(lgPr1_ts, col = 'red', pch = 1)
points(lgPr2_ts, col = 'blue', pch = 3)
# Add legend
legend(
"topleft",
legend = c("y=0.0103*t", "sum(r_t)", "sum(r_t-mean(t_t))"),
col = c("black", "red", "blue"),
lty = c(1, NA, NA),
pch = c(NA, 1, 3)
)
- See some basic stats for 3M return
- If we model the price series as a Geometric Brownian Motion, then the return series, i.e.
diff(log(Price)), (with the constant time-step) should be sum of a constant mean and an iid Gaussian noise. - The Geometric Brownian Motion can be seen as an unit-root process in time-series.
- If we model the price series as a Geometric Brownian Motion, then the return series, i.e.
pr_ts = ts(exp(lgPr1), frequency = 12, start = c(1946, 1))
plot_time_fig(ts = pr_ts, main = "Price", xlab = "Year", ylab = "Price/Price_0")
Unit-root test (U.S. quarterly GDP)¶
da = read.table("../AFTS_data/Ch02/q-gdp4708.txt", header = T)
da[1:5,]
lgdp = log(da$gdp)
m1 = ar(diff(lgdp), method = 'mle')
m1$order
adfTest(lgdp, lags = 10, type = c("c"))
apply(as.data.frame(lgdp), 2, basicStats)
lgdp_ts = ts(lgdp, frequency = 4, start = c(1947 ,1))
plot_unit_root_figs(
lgdp_ts, freq = 4, start = c(1947, 2), lag_max = 20,
main = 'U.S. quarterly GDP (Fig. 2.11)', xlab = "Year", ylab = "ln(GDP)"
)
Full ARIMA workflow (S&P 500)¶
- Unit-root testing
- Order determination and parameter estimation
- Box-Ljung test
- Plot forcasting
# install.packages("RQuantLib")
load_quantlib_calendars(ql_calendars = NULL, from = 1950, to = 2008, financial = TRUE)
da = read.table("../AFTS_data/Ch02/d-sp55008.txt", header = T)
da[1:10,]; dim(da)
Unit-root testing¶
lsp5 = log(da$close)
par(bg = "white")
lsp5_ts = ts(lsp5, frequency = 252, start = c(1950 , 1, 3))
plot(lsp5_ts, col = 'black', type = 'l', main = 'S&P 500 (Fig. 2.12)', xlab = "Year", ylab = "log(S&P 500)")
m2 = ar(diff(lsp5), method = 'mle')
m2$order
adfTest(lsp5, lags = 2, type = ("ct"))
log(length(lsp5))
adfTest(lsp5, lags = 15, type = ("ct"))
Order determination and parameter estimations¶
- With one-differencing
diff_lsp5_ts = ts(diff(lsp5), frequency = 252, start = c(1950 , 1, 4))
length(lsp5); length(diff_lsp5_ts);
display(head(lsp5, 10))
display(head(diff_lsp5_ts, 10))
plot_unit_root_figs(
lsp5_ts, freq = 252, start = c(1950 , 1, 4), lag_max = 20,
main = 'S&P500 Index', xlab = "Year", ylab = "ln(S&P500)"
)
diff_lsp5_ts = ts(diff(lsp5), frequency = 252, start = c(1950 , 1, 4))
par(bg = 'white')
pacf(diff_lsp5_ts)
par(bg = 'white')
acf(diff_lsp5_ts)
sp5_eacf_res <- eacf(diff_lsp5_ts, ar.max = 6, ma.max = 12)
sp5_eacf_stats_tb = format(as.data.frame(sp5_eacf_res$eacf), digits = 3)
names(sp5_eacf_stats_tb) <- seq(from = 0, to = 12)
display(sp5_eacf_stats_tb)
display(sp5_eacf_res$symbol)
# pp67, asymptotic standard error of EACF
display(2/sqrt(length(diff_lsp5_ts)))
diff_sp5_mod = arima(diff_lsp5_ts, order = c(1,0,2), include.mean = F)
diff_sp5_mod
sp5_mod = arima(lsp5_ts, order = c(1,1,2), include.mean = T)
sp5_mod
Box-Ljung test¶
- pp33 very briefly talks about how to choose lag in the
Box.test - Null hypothesis cannot be reject, so that the model is adequte.
diff_sp5_box_test = Box.test(diff_sp5_mod$residuals, lag = 5, type = 'Ljung')
diff_sp5_box_test
diff_sp5_box_test_1 = Box.test(diff_sp5_mod$residuals, lag = 12, type = 'Ljung')
diff_sp5_box_test_1
sp5_box_test = Box.test(sp5_mod$residuals, lag = 5, type = 'Ljung')
sp5_box_test
sp5_box_test_1 = Box.test(sp5_mod$residuals, lag = 12, type = 'Ljung')
sp5_box_test_1
Plot forcasting¶
diff_sp5_mod$nobs
npts = 22
eotr = 14661-npts
h = npts
freq = 252
order_1 = c(1,0,2)
fixed = NULL
seasonal = NULL
diff_sp5_fc_res = plot_arima_forecast_fig(
da_ts=diff_lsp5_ts, eotr=eotr, h=h, npts=npts, frequency=freq,
order=order_1, seasonal=seasonal, fixed=fixed, method='ML', transform.pars=TRUE,
main="Forecasts from ARIMA(1,0,2) with\n non-zero mean for ln(Return) of s&p500",
xlab="Year", ylab="ln(Return)", ylim=c(-0.05, 0.05)
)
order_2 = c(1,1,2)
sp5_fc_res = plot_arima_forecast_fig(
da_ts=lsp5_ts, eotr=eotr, h=h, npts=npts, frequency=freq,
order=order_2, seasonal=seasonal, fixed=fixed, method='ML', transform.pars=TRUE,
main="Forecasts from ARIMA(1,1,2) with\n non-zero mean for ln(Price) of s&p500",
xlab="Year", ylab="ln(Return)", ylim=c(7, 7.3)
)
Find S&P500 monthly and yearly expected return¶
- We can think of
- For the
log(Return)- $\mu_{daily}\approx 0.000299$
- $\sigma_{daily}\approx 0.009011$
# µ, std, and std of mean; daily, monthly, yearly pr-ratio
sp5_basic_stats <- apply(as.data.frame(diff(lsp5)), 2, basicStats)
display(sp5_basic_stats)
daily_exp_pr_ratio = exp(sp5_basic_stats$`diff(lsp5)`['Mean',]+sp5_basic_stats$`diff(lsp5)`['Stdev',]^2/2)
mnthly_exp_pr_ratio = daily_exp_pr_ratio^21
yrly_exp_pr_ratio = daily_exp_pr_ratio^252
sp5_basic_stats$`diff(lsp5)`['Mean',];
sp5_basic_stats$`diff(lsp5)`['Stdev',];
daily_exp_pr_ratio; mnthly_exp_pr_ratio; yrly_exp_pr_ratio;
Sec. 2.8¶
J&J quarterly earnings¶
- Starting at 1960 first quarter.
da = read.table("../AFTS_data/Ch02/q-jnj.txt", header = F) # needs to set `header = F`
head(da); length(da$V1)
Plot the acf of JnJ quarterly earnings¶
freq = 4
jnj_er = da$V1
jnj_er_ts = ts(jnj_er, frequency = freq, start = c(1960, 1))
jnj_lg_er = log(jnj_er)
jnj_lg_er_ts = ts(jnj_lg_er, frequency = freq, start = c(1960, 1))
jnj_lg_er_ts
plot_time_fig(jnj_er_ts, main = "JnJ quarterly earnings (Fig. 2.13 (a))", xlab = "Year", ylab = "Earning")
plot_time_fig(jnj_lg_er_ts, main = "JnJ quarterly earnings (Fig. 2.13 (b))", xlab = "Year", ylab = "log(Earning)")
plot_pacf_acf(jnj_lg_er, main = "lg(earning) (Fig. 2.14 (a))")
plot_pacf_acf(diff(jnj_lg_er), main = "diff(lg(earning)) (Fig. 2.14 (b))")
plot_pacf_acf(diff(jnj_lg_er, lag = freq), main = "diff(lg(earning), 4) (Fig. 2.14 (c))")
plot_pacf_acf(diff(diff(jnj_lg_er), lag = freq), main = "diff(diff(lg(earning)), 4) (Fig. 2.14 (d))")
plot_pacf_acf(diff(diff(jnj_lg_er, lag = freq)), main = "diff(diff(lg(earning), 4)) (Fig. 2.14 (d)\')")
Apply airline model to the JnJ data.¶
find('arima')
multi_seas_mod_1 <- arima(
x = diff(diff(jnj_lg_er_ts), lag = freq),
order = c(0,0,1),
seasonal = list(order = c(0,0,1), period = freq),
method = 'ML',
include.mean = F
)
summary(multi_seas_mod_1); sqrt(multi_seas_mod_1$sigma2)
multi_seas_mod_2 <- arima(
x = diff(diff(jnj_lg_er_ts), lag = freq),
order = c(0,0,1),
seasonal = list(order = c(0,0,1), period = freq),
method = 'CSS-ML',
include.mean = F
)
summary(multi_seas_mod_2); sqrt(multi_seas_mod_2$sigma2)
multi_seas_mod_3 <- arima(
x = diff(diff(jnj_lg_er_ts), lag = freq),
order = c(0,0,1),
seasonal = list(order = c(0,0,1), period = freq),
method = 'CSS',
include.mean = F
)
summary(multi_seas_mod_3); sqrt(multi_seas_mod_3$sigma2)
# multi_seas_mod_4 <- arima(
# x = diff(diff(jnj_lg_er_ts), lag = freq),
# order = c(1,0,1),
# seasonal = list(order = c(1,0,1), period = freq),
# method = 'ML',
# include.mean = F,
# transform.pars = F
# )
# # multi_seas_mod_4 <- arima(
# # x = diff(diff(jnj_lg_er_ts), lag = freq),
# # order = c(1,0,1),
# # seasonal = list(order = c(1,0,1), period = freq),
# # fixed = c(1, NA, 1, NA),
# # method = 'ML',
# # include.mean = F,
# # transform.pars = F
# # )
# summary(multi_seas_mod_4); sqrt(multi_seas_mod_4$sigma2)
Box-Ljung test¶
- Cannot reject the null-hypothesis. So the model appears to be adequate.
box_test <- Box.test(multi_seas_mod_1$residuals, lag = 12, type = 'Ljung')
box_test
Forecast¶
- (0):
$$ \begin{eqnarray} z_t &=& (1-B)(1-B^4)x_t = (1-\theta B)(1-\beta B^4)a_t = a_t-\theta a_{t-1}-\beta a_{t-4}+\theta \beta a_{t-5} \\ y_t &=& (1-B^4)x_t \\ z_t &=& (1-B)y_t \end{eqnarray} $$
$$ \begin{eqnarray} y_{t+1}&=&z_{t+1}+y_t \\ x_{t+4}&=&y_{t+4}+x_t \end{eqnarray} $$
or
$$ \begin{eqnarray} y_{t}&=&z_{t}+y_{t-1} \\ x_{t}&=&y_{t}+x_{t-4} \end{eqnarray} $$
The expectation is
$$ \begin{eqnarray} E[y_{t+1}]&=&E[z_{t+1}]+E[y_t] \\ E[x_{t+4}]&=&E[y_{t+4}]+E[x_t] \end{eqnarray} $$
- (1) (following are stationary results, but for forecasting needs to consider the time-zero):
$$ \begin{eqnarray} Var[z_t]&=&(1+\theta^2+\beta^2+\theta^2\beta^2)\sigma^2 \\ Cov(z_{t+1},z_t)&=&-\theta(1+\beta^2)\sigma^2 \\ Cov(z_{t+2},z_t)&=&0 \\ Cov(z_{t+3},z_t)&=&\theta\beta\sigma^2 \\ Cov(z_{t+4},z_t)&=&-\beta(1+\theta^2)\sigma^2 \\ Cov(z_{t+5},z_t)&=&\theta\beta\sigma^2 \\ Cov(z_{t+i},z_t)&=&0, \forall i>5 \end{eqnarray} $$
- (2):
$$ \begin{eqnarray} Var[y_t]&=&Var[z_t]+Var[y_{t-1}]+Cov(z_t, y_{t-1}) \\ Cov(y_{t+1}, y_t)&=&Cov(z_{t+1}, y_t)+Var[y_t]=Cov(z_{t+1},z_t+z_{t-1}+z_{t-2}+z_{t-3}+z_{t-4})+Var[y_t] \\ Cov(y_{t+2}, y_t)&=&Cov(z_{t+2}, y_t)+Cov(y_{t+1}, y_t)=Cov(z_{t+2},z_t+z_{t-1}+z_{t-2}+z_{t-3})+Cov(y_{t+1}, y_t) \\ Cov(y_{t+3}, y_t)&=&Cov(z_{t+3}, y_t)+Cov(y_{t+2}, y_t)=Cov(z_{t+3},z_t+z_{t-1}+z_{t-2})+Cov(y_{t+2}, y_t) \\ Cov(y_{t+4}, y_t)&=&Cov(z_{t+4}, y_t)+Cov(y_{t+3}, y_t)=Cov(z_{t+4},z_t+z_{t-1})+Cov(y_{t+3}, y_t) \\ Cov(y_{t+5}, y_t)&=&Cov(z_{t+5}, y_t)+Cov(y_{t+4}, y_t)=Cov(z_{t+5},z_t)+Cov(y_{t+4}, y_t) \\ Cov(y_{t+i}, y_t)&=&Cov(y_{t+5}, y_t), \forall i>5 \end{eqnarray} $$
- (3):
$$ \begin{eqnarray} Var[x_t]&=&Var[y_t]+Var[x_{t-4}]+Cov(y_t,y_{t-4}+y_{t-8}+\cdots) \end{eqnarray} $$
# plot_arima_forecast_fig <- function(
# da_ts, eotr, h, npts, frequency,
# order, seasonal, fixed, method, include.mean, transform.pars,
# main, xlab, ylab, ylim = NULL) {
# par(bg = "white")
# # arima model
# tr_da_ts <- ts(da_ts[1:eotr], frequency = frequency, start = start(da_ts))
# if (is.null(transform.pars)) {
# ts_fm3 <- arima(tr_da_ts, order = order, fixed = fixed, seasonal = seasonal, method = method, include.mean = include.mean)
# } else {
# ts_fm3 <- arima(tr_da_ts, order = order, fixed = fixed, seasonal = seasonal, method = method, include.mean = include.mean, transform.pars = transform.pars)
# }
# print(ts_fm3$nobs)
# # Forecast
# ts_fm3$x <- tr_da_ts # https://stackoverflow.com/a/42464130/4307919
# ts_fc_res <- forecast(ts_fm3, h = h)
# # Plot forecast
# if (is.null(npts)) {
# npts <- eotr
# }
# xmin <- time(da_ts)[eotr] - npts / frequency
# xmax <- time(da_ts)[eotr] + (max(h, length(da_ts) - eotr) + 1) / frequency
# cat(xmin, ";", xmax)
# plot(ts_fc_res, xlim = c(xmin, xmax), ylim = ylim, main = main, xlab = xlab, ylab = ylab)
# # Plot forecast mean (prepend the last observed data in the training dataset)
# dummy_1st_fmean_ts <- ts(c(c(da_ts[eotr]), as.numeric(ts_fc_res$mean)), frequency = frequency, start = end(tr_da_ts))
# lines(dummy_1st_fmean_ts)
# points(dummy_1st_fmean_ts, pch = 1)
# # Plot confidence interval (95%)
# dummy_1st_flower_ts <- ts(c(c(da_ts[eotr]), as.numeric(ts_fc_res$lower[, 2])), frequency = frequency, start = end(tr_da_ts))
# dummy_1st_fupper_ts <- ts(c(c(da_ts[eotr]), as.numeric(ts_fc_res$upper[, 2])), frequency = frequency, start = end(tr_da_ts))
# lines(dummy_1st_flower_ts, lty = 2)
# lines(dummy_1st_fupper_ts, lty = 2)
# # Plot original data
# orig_plot_ts <- ts(da_ts[(eotr - npts + 1):length(da_ts)], frequency = frequency, start = time(da_ts)[eotr] - (npts - 1) / frequency)
# lines(orig_plot_ts)
# points(orig_plot_ts, pch = 19)
# ts_fc_res
# }
# comb_forecast_res <- function(forecast_obj, da_ts, eotr, freq) {
# display(summary(forecast_obj))
# fc_std_err = (forecast_obj$upper[,2]-forecast_obj$lower[,2])/2/qnorm(p = 0.975)
# actual_ts = ts(da_ts[(eotr+1):length(da_ts)], frequency = freq, start = time(da_ts)[eotr+1])
# display(forecast_obj$mean); display(fc_std_err); display(actual_ts)
# multistep_ahead_forecast_tb = cbind(forecast_obj$mean, fc_std_err, actual_ts)
# dimnames(multistep_ahead_forecast_tb)[[2]] <- c("Forecast", "Std. Error", "Actual")
# multistep_ahead_forecast_tb
# }
npts = 6
h = 8
eotr = 79-h
freq = 4
order = c(0,0,1)
seasonal = list(order = order, period = freq)
fixed = NULL
include.mean = F
diff_seasonal_reg_jnj_ts = ts(diff(diff(jnj_lg_er_ts), lag = freq), frequency = freq, start = c(1961, 2))
diff_seasonal_reg_jnj_fc_res = plot_arima_forecast_fig(
da_ts=diff_seasonal_reg_jnj_ts, eotr=eotr, h=h, npts=npts, frequency=freq,
order=order, seasonal=seasonal, fixed=fixed, method='ML', include.mean=include.mean, transform.pars=TRUE,
main="Forecasts from ARIMA(0,0,1)(0,0,1)[4] with\n non-zero mean for (1-B)(1-B^4)ln(QtlyEarning) of JnJ",
xlab="Year", ylab="(1-B)(1-B^4)ln(QtlyEarning)"#, ylim=c(-0.05, 0.05)
)
summary(diff_seasonal_reg_jnj_fc_res)
jnj_multistep_ahead_forecast_tb <- comb_forecast_res(diff_seasonal_reg_jnj_fc_res, diff_seasonal_reg_jnj_ts, eotr, freq)
jnj_multistep_ahead_forecast_tb
- Use DP to calculate forecasting expectation and variance. $$ \begin{eqnarray} E[y_{t+1}]&=&E[z_{t+1}]+E[y_t] \\ E[x_{t+4}]&=&E[y_{t+4}]+E[x_t] \end{eqnarray} $$
# Assume freq=s>=4
# z_t
diff_seasonal_reg_jnj_ts = ts(diff(diff(jnj_lg_er_ts), lag = freq), frequency = freq, start = c(1961, 2))
# y_t
diff_seasonal_jnj_ts = ts(diff(jnj_lg_er_ts, lag = freq), frequency = freq, start = c(1961, 1))
fstart = c(1979, 1)
iz = 71
iy = 72
ix = 76
h = 8
# E[y_t]
e_y <- list()
for (i in 1:h) {
if (i==1) {
e_y[[paste(i)]] = diff_seasonal_reg_jnj_fc_res$mean[i] + diff_seasonal_jnj_ts[length(diff_seasonal_jnj_ts)-h]
} else {
e_y[[paste(i)]] = diff_seasonal_reg_jnj_fc_res$mean[i] + e_y[[paste(i-1)]]
}
}
# E[x_t]
e_x <- list()
for (i in 1:h) {
if (i<=freq) {
e_x[[paste(i)]] = e_y[[paste(i)]] + jnj_lg_er_ts[length(jnj_lg_er_ts)-h+i-freq]
} else {
e_x[[paste(i)]] = e_y[[paste(i)]] + e_x[[paste(i-freq)]]
}
}
theta = -as.numeric(diff_seasonal_reg_jnj_fc_res$model$coef[1])
beta = -as.numeric(diff_seasonal_reg_jnj_fc_res$model$coef[2])
sigma2 = as.numeric(diff_seasonal_reg_jnj_fc_res$model$sigma2)
gen_key <- function(i,j) {paste("(",i,",",j,")", sep = "")}
# V[z_t]
# The following are unconditional, we need to calculate conditional
# v_z <- list(
# "0"=as.numeric((1+theta^2+beta^2+theta^2*beta^2)*sigma2),
# "1"=as.numeric(-theta*(1+beta^2)*sigma2),
# "2"=0,
# "3"=as.numeric(theta*beta*sigma2),
# "4"=as.numeric(-beta*(1+theta^2)*sigma2),
# "5"=as.numeric(theta*beta*sigma2)
# )
v_z <- list()
for (i in 1:h) {
for (j in 0:h) {
v_z[[gen_key(i,j)]] = 0
if (j==0) {
v_z[[gen_key(i,j)]] = v_z[[gen_key(i,j)]] + sigma2
if (i-1>0) {v_z[[gen_key(i,j)]] = v_z[[gen_key(i,j)]] + theta^2*sigma2}
if (i-freq>0) {v_z[[gen_key(i,j)]] = v_z[[gen_key(i,j)]] + beta^2*sigma2}
if (i-1-freq>0) {v_z[[gen_key(i,j)]] = v_z[[gen_key(i,j)]] + theta^2*beta^2*sigma2}
} else if (j==1) {
v_z[[gen_key(i,j)]] = -theta*sigma2
if (i-freq>0) {v_z[[gen_key(i,j)]] = v_z[[gen_key(i,j)]] - theta*beta^2*sigma2}
} else if (j==freq-1) {
if (i-1>0) {v_z[[gen_key(i,j)]] = theta*beta*sigma2}
} else if (j==freq) {
v_z[[gen_key(i,j)]] = -beta*sigma2
if (i-1>0) {v_z[[gen_key(i,j)]] = v_z[[gen_key(i,j)]] - beta*theta^2*sigma2}
} else if (j==freq+1) {v_z[[gen_key(i,j)]] = theta*beta*sigma2}
else {v_z[[gen_key(i,j)]] = 0}
}
for (j in 1:h) {
v_z[[gen_key(i,-j)]] = 0
if (j==1) {
if (i-1>0) {v_z[[gen_key(i,-j)]] = v_z[[gen_key(i,-j)]] - theta*sigma2}
if (i-1-freq>0) {v_z[[gen_key(i,-j)]] = v_z[[gen_key(i,-j)]] - theta*beta^2*sigma2}
} else if (j==freq-1) {
if (i-freq>0) {v_z[[gen_key(i,-j)]] = v_z[[gen_key(i,-j)]] + theta*beta*sigma2}
} else if (j==freq) {
if (i-freq>0) {v_z[[gen_key(i,-j)]] = v_z[[gen_key(i,-j)]] - beta*sigma2}
if (i-1-freq>0) {v_z[[gen_key(i,-j)]] = v_z[[gen_key(i,-j)]] - beta*theta^2*sigma2}
} else if (j==freq+1) {
if (i-1-freq>0) {v_z[[gen_key(i,-j)]] = v_z[[gen_key(i,-j)]] + theta*beta*sigma2}
}
}
}
# V[y_t]
v_tau <- list(
"0"=sigma2,
"1"=-theta*sigma2,
"4"=-beta*sigma2,
"5"=theta*beta*sigma2
)
v_y <- list()
for (i in 1:h) {
v_y[[gen_key(i,0)]] = v_z[[gen_key(i,0)]]
if (i-1>0) {v_y[[gen_key(i,0)]] = v_y[[gen_key(i,0)]] + v_y[[gen_key(i-1,0)]] - theta*v_tau[["0"]]}
if (i-freq>0) {v_y[[gen_key(i,0)]] = v_y[[gen_key(i,0)]] - beta*(v_tau[["0"]]+v_tau[["1"]])}
if (i-1-freq>0) {v_y[[gen_key(i,0)]] = v_y[[gen_key(i,0)]] + theta*beta*(v_tau[["0"]]+v_tau[["1"]]+v_tau[["4"]])}
for (j in 1:h) {
v_y[[gen_key(i,j)]] = v_y[[gen_key(i,j-1)]]
if (j<=freq+1) {
for (k in j:(freq+1)) {
if (i+j-k>0) {v_y[[gen_key(i,j)]] = v_y[[gen_key(i,j)]] + v_z[[gen_key(i+j-k,k)]]}
else {v_y[[gen_key(i,j)]] = v_y[[gen_key(i,j)]] + v_z[[gen_key(i+j,-k)]]}
}
}
}
}
# V[x_t]
v_x <- list()
for (i in 1:h) {
v_x[[paste(i)]] = v_y[[gen_key(i,0)]]
if (i>freq) {v_x[[paste(i)]] = v_x[[paste(i)]] + v_x[[paste(i-freq)]]}
j = freq
while (TRUE) {
if (i>j) {
v_x[[paste(i)]] = v_x[[paste(i)]] + v_y[[gen_key(i-j,j)]]
j = j + freq
}
else {break}
}
}
e_y; e_x
# My calculation of std.error match the results from the package
for (i in 1:h) {
cat(i, ":", sqrt(v_z[[gen_key(i,0)]]), "\n")
}
jnj_multistep_ahead_forecast_tb
v_z
v_y
v_x
theta; beta; sigma2
- Plot the forecasting
- The resulting plot is somewhat different from the textbook Fig. 2.15.
e_qer_no_var_arr <- c()
e_lg_qer_arr <- c()
e_qer_arr <- c()
v_qer_arr <- c()
for (i in 1:h) {
e_qer_no_var_arr <- c(e_qer_no_var_arr, exp(e_x[[paste(i)]]))
e_lg_qer_arr <- c(e_lg_qer_arr, e_x[[paste(i)]])
e_qer_arr <- c(e_qer_arr, exp(e_x[[paste(i)]]+v_x[[paste(i)]]/2))
v_qer_arr <- c(v_qer_arr, v_x[[paste(i)]])
cat(i, ":",
exp(e_x[[paste(i)]]), exp(e_x[[paste(i)]]+v_x[[paste(i)]]/2),
e_x[[paste(i)]], v_x[[paste(i)]], "\n",
sep = ","
)
}
help(legend)
par(bg = "white")
da_ts = jnj_er_ts
lg_da_ts = jnj_lg_er_ts
eotr = length(da_ts)-h
tr_da_ts <- ts(da_ts[1:eotr], frequency = freq, start = start(da_ts))
xmin <- time(da_ts)[eotr] - (npts+1) / freq
xmax <- time(da_ts)[eotr] + (h+1) / freq
xlim = c(xmin, xmax)
ylim = c(0, 30)
cat(xmin, ";", xmax)
# Label 1: Actual Earning Line
plot(da_ts,
xlim = xlim,
ylim = ylim,
main = "JnJ QtlyEarning Multiplicative\nSeasonal Model Forecasting (Fig. 2.15)",
xlab = "Year", ylab = "QtlyEarning", lty=1
)
# Plot forecast mean (prepend the last observed data in the training dataset)
dummy_1st_fmean_ts <- ts(
c(c(da_ts[eotr]), as.numeric(e_qer_arr)),
frequency = freq, start = end(tr_da_ts)
)
# Label 2: NULL
lines(dummy_1st_fmean_ts)
# Label 3: Forecast Mean
points(dummy_1st_fmean_ts, pch = 1)
# Plot confidence interval (95%)
t_stats = qnorm(p = 0.975, mean = 0, sd = 1)
dummy_1st_flower_ts <- ts(
exp(c(c(lg_da_ts[eotr]), e_lg_qer_arr)-t_stats*c(c(0), sqrt(v_qer_arr))), # lower CI for log-normal
frequency = freq, start = end(tr_da_ts)
)
dummy_1st_fupper_ts <- ts(
exp(c(c(lg_da_ts[eotr]), e_lg_qer_arr)+t_stats*c(c(0), sqrt(v_qer_arr))), # upper CI for log-normal
frequency = freq, start = end(tr_da_ts)
)
# Label 4: Forecast 95% Lower Bound
lines(dummy_1st_flower_ts, lty = 2)
# Label 5: Forecast 95% Upper Bound
lines(dummy_1st_fupper_ts, lty = 2)
# Plot original data
orig_plot_ts <- ts(
da_ts[(eotr - npts + 1):length(da_ts)],
frequency = freq, start = time(da_ts)[eotr] - (npts - 1) / freq
)
# Label 6: NULL
lines(orig_plot_ts)
# Label 7: Actual Earning
points(orig_plot_ts, pch = 19)
legend(
"topleft",
legend = c(
"Actual Earning Line", NULL, "Forecast Mean", "Forecast 95% Lower Bound",
"Forecast 95% Upper Bound", NULL, "Actual Earning"
),
# col = c("black", "red", "blue"),
lty = c(1, NA, 2, 2, NA),
pch = c(NA, 1, NA, NA, 19)
)
Forecast: Directly apply multiplicative seasonal model¶
- The forecast mean from my calculation and the R package are close to each other.
- This is gives much closer CI to the textbook results than my calculation. My calculation inflate the forecasting variance.
length(jnj_lg_er_ts)
npts = 6
h = 8
eotr = length(jnj_lg_er_ts)-h
freq = 4
order = c(0,1,1) # From the solution of 2-6
seasonal = list(order = order, period = freq)
fixed = NULL
include.mean = F
multi_seas_mod_jnj_ts = ts(jnj_lg_er_ts, frequency = freq, start = c(1960, 1))
multi_seas_mod_jnj_fc_res = plot_arima_forecast_fig(
da_ts=multi_seas_mod_jnj_ts, eotr=eotr, h=h, npts=npts, frequency=freq,
order=order, seasonal=seasonal, fixed=fixed, method='ML', include.mean=include.mean, transform.pars=TRUE,
main="Forecasts from ARIMA(0,1,1)(0,1,1)[4] with non-zero mean for\n(1-B)(1-B^4)ln(QtlyEarning) = (1-theta*B)(1-Theta*B^4)a_t of JnJ",
xlab="Year", ylab="QtlyEarning", ylim=c(2, 3.2)
)
summary(multi_seas_mod_jnj_fc_res)
multi_seas_mod_jnj_fc_tb = comb_forecast_res(
multi_seas_mod_jnj_fc_res,
da_ts = multi_seas_mod_jnj_ts,
eotr=eotr,
freq=freq
)
multi_seas_mod_jnj_fc_df = as.data.frame(multi_seas_mod_jnj_fc_tb)
multi_seas_mod_jnj_fc_tb
multi_seas_mod_jnj_fc_df$Forecast
As comparison, the following are expectation from my calculation (my calculation is close to the result from package)
$`1`
2.58563748050113
$`2`
2.62707380269453
$`3`
2.61107565060245
$`4`
2.36451453244944
$`5`
2.72919356620702
$`6`
2.77062988840042
$`7`
2.75463173630834
$`8`
2.50807061815533
multi_seas_mod_jnj_fc_df$`Std. Error`^2
As comparison, the following are variance from my calculation (my calculation somhow inflate the variance)
$`1`
0.00840857826352835
$`2`
0.0149193615519221
$`3`
0.0214301448403158
$`4`
0.0279409281287095
$`5`
0.0428324475174856
$`6`
0.0624633460377802
$`7`
0.0820942445580749
$`8`
0.101725143078369
par(bg = "white")
da_ts = jnj_er_ts
lg_da_ts = jnj_lg_er_ts
eotr = length(da_ts)-h
tr_da_ts <- ts(da_ts[1:eotr], frequency = freq, start = start(da_ts))
xmin <- time(da_ts)[eotr] - (npts+1) / freq
xmax <- time(da_ts)[eotr] + (h+1) / freq
xlim = c(xmin, xmax)
ylim = c(0, 30)
cat(xmin, ";", xmax)
# Label 1: Actual Earning Line
plot(da_ts,
xlim = xlim,
ylim = ylim,
main = "JnJ QtlyEarning Multiplicative\nSeasonal Model Forecasting (Fig. 2.15)",
xlab = "Year", ylab = "QtlyEarning", lty=1
)
# Plot forecast mean (prepend the last observed data in the training dataset)
multi_seas_e_qer_arr = exp(multi_seas_mod_jnj_fc_df$Forecast+((multi_seas_mod_jnj_fc_df$`Std. Error`)^2)/2)
dummy_1st_fmean_ts <- ts(
c(c(da_ts[eotr]), as.numeric(multi_seas_e_qer_arr)),
frequency = freq, start = end(tr_da_ts)
)
# Label 2: NULL
lines(dummy_1st_fmean_ts)
# Label 3: Forecast Mean
points(dummy_1st_fmean_ts, pch = 1)
# Plot confidence interval (95%)
t_stats = qnorm(p = 0.975, mean = 0, sd = 1)
multi_seas_mod_jnj_fc_res$lower[,2]
dummy_1st_flower_ts <- ts(
exp(c(c(lg_da_ts[eotr]), multi_seas_mod_jnj_fc_res$lower[,2])), # lower CI for log-normal
frequency = freq, start = end(tr_da_ts)
)
dummy_1st_fupper_ts <- ts(
exp(c(c(lg_da_ts[eotr]), multi_seas_mod_jnj_fc_res$upper[,2])), # upper CI for log-normal
frequency = freq, start = end(tr_da_ts)
)
# Label 4: Forecast 95% Lower Bound
lines(dummy_1st_flower_ts, lty = 2)
# Label 5: Forecast 95% Upper Bound
lines(dummy_1st_fupper_ts, lty = 2)
# Plot original data
orig_plot_ts <- ts(
da_ts[(eotr - npts + 1):length(da_ts)],
frequency = freq, start = time(da_ts)[eotr] - (npts - 1) / freq
)
# Label 6: NULL
lines(orig_plot_ts)
# Label 7: Actual Earning
points(orig_plot_ts, pch = 19)
legend(
"topleft",
legend = c(
"Actual Earning Line", NULL, "Forecast Mean", "Forecast 95% Lower Bound",
"Forecast 95% Upper Bound", NULL, "Actual Earning"
),
# col = c("black", "red", "blue"),
lty = c(1, NA, 2, 2, NA),
pch = c(NA, 1, NA, NA, 19)
)
CRSP Decile 1 Index¶
da = read.table("../AFTS_data/Ch02/m-deciles08.txt", header = T)
da[1:5,]
crsp_decile_1_rtn = da$CAP1RET
crsp_decile_1_rtn_ts = ts(crsp_decile_1_rtn, frequency = 12, start = c(1970, 1))
jan = rep(c(1,rep(0,11)),39) # Create Jan dummy
m1 = lm(crsp_decile_1_rtn~jan)
summary(m1)
plot_time_fig(crsp_decile_1_rtn_ts, main = "Fig. 2.16(a)", xlab = "Year", ylab = "Simple Return")
plot_acf(da = crsp_decile_1_rtn, lag.max = 40, main = "Fig. 2.16(b)")
crsp_decile_1_adj_rtn = ts(m1$residuals, frequency = 12, start = c(1970, 1))
plot_time_fig(crsp_decile_1_adj_rtn, main = "Fig. 2.16(c)", xlab = "Year", ylab = "Adjusted Return")
plot_acf(da = m1$residuals, lag.max = 40, main = "Fig. 2.16(d)")
m2 = arima(crsp_decile_1_rtn, order=c(1,0,0), seasonal=list(order=c(1,0,1), period=12))
summary(m2)
par(bg = 'white')
tsdiag(m2,gof=36)
m3 = arima(crsp_decile_1_rtn, order=c(1,0,0), seasonal=list(order=c(1,0,1), period=12), include.mean = F)
summary(m3)
Sec. 2.9¶
Weekly 1yr and 3yr interest rate¶
da1 = read.table("../AFTS_data/Ch02/w-gs1yr.txt", header = T)
da3 = read.table("../AFTS_data/Ch02/w-gs3yr.txt", header = T)
da1[1:5,]; da3[1:5,]
Exploration¶
r1 = da1$rate
r3 = da3$rate
freq = 52
r1_ts = ts(r1, frequency = freq, start = c(1962, 1))
r3_ts = ts(r3, frequency = freq, start = c(1962, 1))
c1 = diff(r1)
c3 = diff(r3)
c1_ts = ts(c1, frequency = freq, start = c(1962, 2))
c3_ts = ts(c3, frequency = freq, start = c(1962, 2))
par(bg = 'white')
plot(r1_ts, main = "Weekly 1yr or 3yr U.S. Interest Rate (Fig. 2.17)", xlab = "Year", ylab = "Percent")
lines(r3_ts, lty = 2)
legend(
"topleft",
legend = c("1yr rate", "3yr rate"),
lty = c(1, 2)
)
plot(c1_ts, main = "Weekly 1yr U.S. Interest Rate Changes (Fig. 2.20(a))", xlab = "Year", ylab = "Percent")
plot(c3_ts, main = "Weekly 3yr U.S. Interest Rate Changes (Fig. 2.20(b))", xlab = "Year", ylab = "Percent")
par(bg = "white")
plot(x = r1, y = r3, main = "Fig. 2.18(a)", xlab = "1yr rate", ylab = "3yr rate")
plot(x = c1, y = c3, main = "Fig. 2.18(b)", xlab = "diff(1yr rate)", ylab = "diff(3yr rate)")
Fit models¶
- Strong serial correlation in ACF of residuals, indicating that there is a unit-root process.
- Also conducted ADF unit-root tests. The results show that null-hypothesis cannot be rejected.
- Null-hypothesis is that the time series has unit-root.
m1 = lm(r3~r1)
summary(m1)
par(bg = 'white')
plot(m1$residuals, type = 'l', main = "Simple Linear Model (Fig. 2.19(a))", xlab = "Year")
plot_acf(da = m1$residuals, lag.max = 36, main = "Residuals ACF Plot (Fig. 2.19(b))")
pacf(x = m1$residuals, lag.max = 36, main = "Residuals PACF Plot")
help(adfTest)
# "lags=12" is chosen from PACF plot
adf_test_res_1 = adfTest(r1, lags = 12, type = c("ct"))
adf_test_res_3 = adfTest(r3, lags = 12, type = c("ct")) # 2.7.3. Trend-Stationary Time-Series
adf_test_res_1; adf_test_res_3
box_test_1 = Box.test(m1$residuals, lag = 12, type = 'Ljung')
box_test_1
- The serial correlation is largely reduced, but there are still some serial correlation in ACF of residuals, indicating that the model is not adequate. We need to model some serial correlation in residuals. The ACF plot shows strong serial correlation at lag-1, so that we use MA(1) to model the error.
m2 = lm(c3~-1+c1) # No intercept
summary(m2)
plot_acf(da = m2$residuals, lag.max = 36, main = "Fig. 2.21(b)")
# "lags=12" is chosen from PACF plot
adf_test_res_c1 = adfTest(c1, lags = 12, type = c("ct"))
adf_test_res_c3 = adfTest(c3, lags = 12, type = c("ct")) # 2.7.3. Trend-Stationary Time-Series
adf_test_res_c1; adf_test_res_c3
box_test_2 = Box.test(m2$residuals, lag = 12, type = 'Ljung')
box_test_2
- The serial correlations in ACF plots no longer exists. But the Box-Ljung test is still significant. Maybe we can improve the model more to reduce some serial correlation in the residuals.
m3 = arima(c3, order = c(0,0,1), xreg = c1, include.mean = F)
# rsq = 1 - sum(m3$residuals^2)/sum(c3^2-mean(c3)) # B/c in the model we set `include.mean = F`, so here when evaluating R-squared, we don't include the mean either.
rsq = 1 - sum(m3$residuals^2)/sum(c3^2)
summary(m3); rsq
plot_acf(da = m3$residuals, lag.max = 36, main = "ACF of residuals after taking difference\nand model error as a MA(1) process")
log(length(c1))
box_test_3 = Box.test(m3$residuals, lag = 12, type = 'Ljung')
box_test_3
Sec. 2.10¶
Example of consistent estimation for covariance matrix (1yr and 3yr rates)¶
- For heteroskedasticity-consistent or heteroskedasticity&autocorrelation-consistent estimators.
da1 = read.table("../AFTS_data/Ch02/w-gs1yr.txt", header = T)
da3 = read.table("../AFTS_data/Ch02/w-gs3yr.txt", header = T)
da1[1:5,]; da3[1:5,]
r1 = da1$rate
r3 = da3$rate
freq = 52
r1_ts = ts(r1, frequency = freq, start = c(1962, 1))
r3_ts = ts(r3, frequency = freq, start = c(1962, 1))
c1 = diff(r1)
c3 = diff(r3)
c1_ts = ts(c1, frequency = freq, start = c(1962, 2))
c3_ts = ts(c3, frequency = freq, start = c(1962, 2))
m1 <- lm(c3~c1)
summary(m1)
- HC estimator
- GPT: R use white correction in OLS summary
help(vcovHC)
# Applying White's correction
# `type` parameter 'arg' should be one of “HC3”, “const”, “HC”, “HC0”, “HC1”, “HC2”, “HC4”, “HC4m”, “HC5”
hc1_robust_se <- vcovHC(m1, type = "HC1") # HC1 is often recommended; it's a variant of White's correction
hc1_robust_se
help(coeftest)
# Getting the summary with corrected standard errors
summary_with_hc1_robust_se <- coeftest(m1, hc1_robust_se)
coeftest(m1, vcovHC);
coeftest(m1, vcovHC, type = "HC0");
print(summary_with_hc1_robust_se);
dwtest(m1);
jarque.bera.test(m1$residuals);
Box.test(m1$residuals, lag = 33, type = 'Ljung')
- HAC estimator
- GPT: Use heteroscedasticity and autocorrelation consistent (HAC) estimator in R
help(vcovHAC)
# Compute HAC standard errors
hac_se <- vcovHAC(m1)
hac_se
# Summary of the model with HAC standard errors
# Doesn't have effect
summary(m1, robust = hac_se)
# Alternatively, you can directly use coeftest() from lmtest to test coefficients with HAC standard errors
coeftest(m1, vcov. = hac_se)
- The following function gives closer answer at the pp100 in the textbook.
coeftest(m1, NeweyWest(m1, lag = 12, prewhite = F)) # From the solution to 2-7
- Including lag in linear regression
- GPT: R linear model using lag operator
help(merge)
# Create a zoo object for the time series
c3_zoo <- zoo(c3)
c1_zoo <- zoo(c1)
# Lag the series by 1 period
c3_lag1_zoo <- lag(c3_zoo, -1, na.pad=TRUE)
c1_lag1_zoo <- lag(c1_zoo, -1, na.pad=TRUE)
# Combine the original and lagged series
c3_c1_zoo <- merge(c3_zoo, c1_zoo, c3_lag1_zoo, c1_lag1_zoo, all = TRUE)
dim(c3_c1_zoo); c3_c1_zoo[1:5,]
help(lm)
# Fit a linear model
model_zoo <- lm(c3_zoo ~ c1_zoo+c3_lag1_zoo+c1_lag1_zoo, data = c3_c1_zoo, na.action = na.exclude)
# View the summary of the model
summary(model_zoo)
dwtest(model_zoo);
jarque.bera.test(model_zoo$residuals);
Box.test(model_zoo$residuals, lag = 33, type = 'Ljung')
Prove that the equation on pp101 is the $j^{th}$ diagnoal element of the equation (2.49).¶
From equation (2.49) $$ \begin{eqnarray} Cov(\hat{\boldsymbol{\beta}})_{HC} = \left[\sum_{t=1}^T \mathbf{x}_t\mathbf{x}_t'\right]^{-1} \left[\sum_{t=1}^T \hat{e}_t^2\mathbf{x}_t\mathbf{x}_t'\right] \left[\sum_{t=1}^T \mathbf{x}_t\mathbf{x}_t'\right]^{-1} \end{eqnarray} $$
The equation on pp101 $$ Var(\hat{\beta}_j)_{HC} = \frac{\sum_{t=1}^T\hat{e}_t^2\hat{v}_t^2}{\left(\sum_{t=1}^T\hat{v}_t^2\right)^2} $$
where $\hat{v}_t$ is the least-squares residual the auxiliary regression $x_{jt}=\mathbf{x}_{-j,t}'\boldsymbol{\gamma}+v_t, t=1,\cdots,T$.
- Basically, we want to prove that $$ \begin{eqnarray} \mathbf{e}_j'\left[\sum_{t=1}^T \mathbf{x}_t\mathbf{x}_t'\right]^{-1} \left[\sum_{t=1}^T \hat{e}_t^2\mathbf{x}_t\mathbf{x}_t'\right] \left[\sum_{t=1}^T \mathbf{x}_t\mathbf{x}_t'\right]^{-1}\mathbf{e}_j = \frac{\sum_{t=1}^T\hat{e}_t^2\hat{v}_t^2}{\left(\sum_{t=1}^T\hat{v}_t^2\right)^2} \end{eqnarray} $$
where $\mathbf{e}_j$ is the unit vector with $j^{th}$ element being $1$ and other element being $0$.
Before proving the general case, we can first check if this is true when all the columns of $\mathbf{X}$ are orthogonal.
We can write $\left[\sum_{t=1}^T\mathbf{x}_t\mathbf{x}_t'\right]=\mathbf{X}'\mathbf{X}$ as a block matrix and use the formula of inversion of a block matrix to prove this.
Sec. 2.11¶
Daily simple return of value-&equal-weighted index from CRSP¶
da_daily = read.table("../AFTS_data/Ch02/d-ibm3dx7008.txt", header = T)
vwretd = da_daily$vwretd
ewretd = da_daily$ewretd
par(bg = "white")
plot(acf(abs(vwretd), lag.max = 300, plot = F), main = "ACF of abs(vwretd) (Fig. 2.22(a))")
plot(acf(abs(ewretd), lag.max = 300, plot = F), main = "ACF of abs(ewretd) (Fig. 2.22(b))")
par(bg = "white")
plot(acf(vwretd^2, lag.max = 300, plot = F), main = "ACF of vwretd^2")
plot(acf(ewretd^2, lag.max = 300, plot = F), main = "ACF of ewretd^2")